home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
C/C++ Users Group Library 1996 July
/
C-C++ Users Group Library July 1996.iso
/
vol_100
/
192_01
/
bawk.c
< prev
next >
Wrap
Text File
|
1980-01-01
|
12KB
|
640 lines
/*
* Bawk main program
*/
#define MAIN 1
#include <stdio.h>
#include "bawk.h"
/*
* Main program
*/
main( argc, argv )
int argc;
char **argv;
{
char gotrules, didfile, getstdin;
getstdin =
didfile =
gotrules = 0;
/*
* Initialize global variables:
*/
Beginact =
Endact =
Rules =
Rulep =
#ifdef DEBUG
Debug =
#endif
Filename =
Linecount =
Saw_break = 0;
Stackptr = Stackbtm - 1;
Stacktop = Stackbtm + MAXSTACKSZ;
Nextvar = Vartab;
strcpy( Fieldsep, " \t" );
strcpy( Recordsep, "\n" );
/*
* Parse command line
*/
while ( --argc )
{
if ( **(++argv) == '-' )
{
/*
* Process dash options.
*/
switch ( tolower( *(++(*argv)) ) )
{
#ifdef DEBUG
case 'd':
++Debug;
break;
#endif
case 0:
++getstdin;
--argv;
goto dosomething;
break;
default: usage();
}
}
else
{
dosomething:
if ( gotrules )
{
/*
* Already read rules file - assume this is
* is a text file for processing.
*/
if ( ++didfile == 1 && Beginact )
doaction( Beginact );
if ( getstdin )
{
--getstdin;
newfile( 0 );
}
else
newfile( *argv );
process();
}
else
{
/*
* First file name argument on command line
* is assumed to be a rules file - attempt to
* compile it.
*/
if ( getstdin )
{
--getstdin;
newfile( 0 );
}
else
newfile( *argv );
compile();
gotrules = 1;
}
}
}
if ( !gotrules )
usage();
if ( ! didfile )
{
/*
* Didn't process any files yet - process stdin.
*/
newfile( 0 );
if ( Beginact )
doaction( Beginact );
process();
}
if ( Endact )
doaction( Endact );
}
/*
* Regular expression/action file compilation routines.
*/
compile()
{
/*
* Compile regular expressions and C actions into Rules struct,
* reading from current input file "Fileptr".
*/
int c, len;
#ifdef DEBUG
if ( Debug )
error( "compiling...", 0 );
#endif
while ( (c = getcharacter()) != -1 )
{
if ( c==' ' || c=='\t' || c=='\n' )
/* swallow whitespace */
;
else if ( c=='#' )
{
/*
* Swallow comments
*/
while ( (c=getcharacter()) != -1 && c!='\n' )
;
}
else if ( c=='{' )
{
#ifdef DEBUG
if ( Debug )
error( "action", 0 );
#endif
/*
* Compile (tokenize) the action string into our
* global work buffer, then allocate some memory
* for it and copy it over.
*/
ungetcharacter( '{' );
len = act_compile( Workbuf );
if ( Rulep && Rulep->action )
{
Rulep->nextrule = getmem( sizeof( *Rulep ) );
Rulep = Rulep->nextrule;
fillmem( Rulep, sizeof( *Rulep ), 0 );
}
if ( !Rulep )
{
/*
* This is the first action encountered.
* Allocate the first Rules structure and
* initialize it
*/
Rules = Rulep = getmem( sizeof( *Rulep ) );
fillmem( Rulep, sizeof( *Rulep ), 0 );
}
Rulep->action = getmem( len );
movemem( Workbuf, Rulep->action, len );
}
else if ( c==',' )
{
#ifdef DEBUG
if ( Debug )
error( "stop pattern", 0 );
#endif
/*
* It's (hopefully) the second part of a two-part
* pattern string. Swallow the comma and start
* compiling an action string.
*/
if ( !Rulep || !Rulep->pattern.start )
error( "stop pattern without a start",
RE_ERROR );
if ( Rulep->pattern.stop )
error( "already have a stop pattern",
RE_ERROR );
len = pat_compile( Workbuf );
Rulep->pattern.stop = getmem( len );
movemem( Workbuf, Rulep->pattern.stop, len );
}
else
{
/*
* Assume it's a regular expression pattern
*/
#ifdef DEBUG
if ( Debug )
error( "start pattern", 0 );
#endif
ungetcharacter( c );
len = pat_compile( Workbuf );
if ( *Workbuf == T_BEGIN )
{
/*
* Saw a "BEGIN" keyword - compile following
* action into special "Beginact" buffer.
*/
len = act_compile( Workbuf );
Beginact = getmem( len );
movemem( Workbuf, Beginact, len );
continue;
}
if ( *Workbuf == T_END )
{
/*
* Saw an "END" keyword - compile following
* action into special "Endact" buffer.
*/
len = act_compile( Workbuf );
Endact = getmem( len );
movemem( Workbuf, Endact, len );
continue;
}
if ( Rulep )
{
/*
* Already saw a pattern/action - link in
* another Rules structure.
*/
Rulep->nextrule = getmem( sizeof( *Rulep ) );
Rulep = Rulep->nextrule;
fillmem( Rulep, sizeof( *Rulep ), 0 );
}
if ( !Rulep )
{
/*
* This is the first pattern encountered.
* Allocate the first Rules structure and
* initialize it
*/
Rules = Rulep = getmem( sizeof( *Rulep ) );
fillmem( Rulep, sizeof( *Rulep ), 0 );
}
if ( Rulep->pattern.start )
error( "already have a start pattern",
RE_ERROR );
Rulep->pattern.start = getmem( len );
movemem( Workbuf, Rulep->pattern.start, len );
}
}
endfile();
}
/*
* Text file main processing loop.
*/
process()
{
/*
* Read a line at a time from current input file at "Fileptr",
* then apply each rule in the Rules chain to the input line.
*/
int i;
#ifdef DEBUG
if ( Debug )
error( "processing...", 0 );
#endif
Recordcount = 0;
while ( getline() )
{
/*
* Parse the input line.
*/
Fieldcount = parse( Linebuf, Fields, Fieldsep );
#ifdef DEBUG
if ( Debug>1 )
{
printf( "parsed %d words:\n", Fieldcount );
for(i=0; i<Fieldcount; ++i )
printf( "<%s>\n", Fields[i] );
}
#endif
Rulep = Rules;
do
{
if ( ! Rulep->pattern.start )
{
/*
* No pattern given - perform action on
* every input line.
*/
doaction( Rulep->action );
}
else if ( Rulep->pattern.startseen )
{
/*
* Start pattern already found - perform
* action then check if line matches
* stop pattern.
*/
doaction( Rulep->action );
if ( dopattern( Rulep->pattern.stop ) )
Rulep->pattern.startseen = 0;
}
else if ( dopattern( Rulep->pattern.start ) )
{
/*
* Matched start pattern - perform action.
* If a stop pattern was given, set "start
* pattern seen" flag and process every input
* line until stop pattern found.
*/
doaction( Rulep->action );
if ( Rulep->pattern.stop )
Rulep->pattern.startseen = 1;
}
}
while ( Rulep = Rulep->nextrule );
/*
* Release memory allocated by parse().
*/
while ( Fieldcount )
free( Fields[ --Fieldcount ] );
}
}
/*
* Miscellaneous functions
*/
parse( str, wrdlst, delim )
char *str;
char *wrdlst[];
char *delim;
{
/*
* Parse the string of words in "str" into the word list at "wrdlst".
* A "word" is a sequence of characters delimited by one or more
* of the characters found in the string "delim".
* Returns the number of words parsed.
* CAUTION: the memory for the words in "wrdlst" is allocated
* by malloc() and should eventually be returned by free()...
*/
int wrdcnt, wrdlen;
char wrdbuf[ MAXLINELEN ], c;
wrdcnt = 0;
while ( *str )
{
while ( instr( *str, delim ) )
++str;
if ( !*str )
break;
wrdlen = 0;
while ( (c = *str) && !instr( c, delim ) )
{
wrdbuf[ wrdlen++ ] = c;
++str;
}
wrdbuf[ wrdlen++ ] = 0;
/*
* NOTE: allocate a MAXLINELEN sized buffer for every
* word, just in case user wants to copy a larger string
* into a field.
*/
wrdlst[ wrdcnt ] = getmem( MAXLINELEN );
strcpy( wrdlst[ wrdcnt++ ], wrdbuf );
}
return wrdcnt;
}
unparse( wrdlst, wrdcnt, str, delim )
char *wrdlst[];
int wrdcnt;
char *str;
char *delim;
{
/*
* Replace all the words in "str" with the words in "wrdlst",
* maintaining the same word seperation distance as found in
* the string.
* A "word" is a sequence of characters delimited by